In [1]:
import numpy as np
from scipy.stats import kurtosis, skew
import pandas as pd
import math
import matplotlib.pyplot as plt
import seaborn as sns
In [2]:
df = pd.read_csv('aug 4 2018.csv', header=None)
df.columns = ['seq1', 'start1', 'end1', 'res1', 'chain1', 'pdb1', 'seq2', 'start2', 'end2', 'res2', 'chain2', 'pdb2', 'rms', 'occ' , 'count']
In [3]:
df.head()
Out[3]:
seq1 start1 end1 res1 chain1 pdb1 seq2 start2 end2 res2 chain2 pdb2 rms occ count
0 ALAASNLYS 683 685 1.94 A 4Y03 ALAASNLYS 362 364 3.20 A 4Y3C 2.83927 111 3
1 ALAASNLYS 683 685 1.94 A 4Y03 ALAASNLYS 444 446 1.66 A 4Y04 3.59824 111 3
2 ALAASNLYS 683 685 1.94 A 4Y03 ALAASNLYS 362 364 3.20 B 4Y3C 2.10710 111 3
3 ALAASNLYS 683 685 1.94 A 4Y03 ALAASNLYS 73 75 2.04 A 4Y4V 2.59099 111 3
4 ALAASNLYS 683 685 1.94 A 4Y03 ALAASNLYS 362 364 3.20 C 4Y3C 2.99060 111 3
In [4]:
id_rms_df = df.groupby(['seq1'])['rms'].apply(list).to_frame()
id_rms_df = id_rms_df.reset_index()
In [5]:
amino_codes = {'ALA': 'A', 'ARG': 'R', 'ASN': 'N', 'ASP': 'D', 'CYS': 'C', 'GLU': 'E', 'GLN': 'Q', 'GLY': 'G', 
               'HIS': 'H', 'ILE': 'I', 'LEU': 'L', 'LYS': 'K', 'MET': 'M', 'PHE': 'F', 'PRO': 'P', 'SER': 'S',
               'THR': 'T', 'TRP': 'W', 'TYR': 'Y', 'VAL': 'V'}
In [6]:
num_bins = np.arange(0, 3.1, 0.1)
# bins
x_x = np.arange(0, 3.1, 0.1)
y_y = np.arange(0, 55.0, 5.0)
# y = [0, 10, 20, 30, 40, 50, 60]
n = 3

RMSD Distribution of 3 Fragment Data

In [7]:
# example data
for index, row in id_rms_df.iterrows():
    fig, ax = plt.subplots(figsize=(12,5))
    
    # the histogram of the data
    n, bins, patches = ax.hist(row['rms'], num_bins, density=1, edgecolor='black', linewidth=1.5)

    bin_centers = 0.5 * (bins[1:] + bins[:-1])
    ax.plot(bin_centers, n, color='red') ## using bin_centers rather than edges
    ax.set_xticks(x_x)
    ax.set_yticks(y_y)
    ax.set_xlabel(' -- RMSD -- ')
    ax.set_ylabel(' -- Frequency -- ')
    ax.text(2.3, 41, 'Fragment: ' + amino_codes[row['seq1'][:3]] + '-' + amino_codes[row['seq1'][3:6]] + '-' + amino_codes[row['seq1'][6:10]], style='italic',
        bbox={'facecolor':'red', 'alpha': 0.5, 'pad':10})
    ax.text(2.3, 34, 'Count: ' + str(skew(row['rms']))[:8], style='italic',
        bbox={'facecolor':'yellow', 'alpha': 0.5, 'pad':10})
    ax.text(2.3, 27, 'Skew: ' + str(skew(row['rms']))[:8], style='italic',
        bbox={'facecolor':'yellow', 'alpha': 0.5, 'pad':10})
    ax.text(2.3, 20, 'Kurtosis: ' + str(kurtosis(row['rms']))[:8], style='italic',
        bbox={'facecolor':'lightblue', 'alpha': 0.5, 'pad':10})

    # Tweak spacing to prevent clipping of ylabel
    plt.show()